Project

Mapping of Ki67 interactions with the genome and comparison with lamina interactions.

Introduction

Various analyses of wildtype cells.

Method

NA

Set-up

Set the parameters and list the data.

# Load dependencies - without warnings / messages
library(tidyverse)
library(GenomicRanges)
library(rtracklayer)
library(ggplot2)
library(RColorBrewer)
library(GGally)
library(corrr)

# Prepare output 
output_dir <- "ts220503_2_ki67_wildtype_cells"
dir.create(output_dir, showWarnings = FALSE)


# Load input
input_dir <- "ts220503_1_data_gathering"

bin_size <- readRDS(file.path(input_dir, "bin_size.rds"))
centromeres <- readRDS(file.path(input_dir, "centromeres.rds"))

colors_set1 <- readRDS(file.path(input_dir, "colors_set1.rds"))
colors_set2 <- readRDS(file.path(input_dir, "colors_set2.rds"))
colors_set3 <- readRDS(file.path(input_dir, "colors_set3.rds"))

tib_padamid_replicates <- readRDS(file.path(input_dir, 
                                            "tib_padamid_replicates.rds"))
tib_padamid_combined <- readRDS(file.path(input_dir, 
                                          "tib_padamid_combined.rds"))

gr_padamid_replicates <- readRDS(file.path(input_dir, 
                                           "gr_padamid_replicates.rds"))
gr_padamid_combined <- readRDS(file.path(input_dir, 
                                         "gr_padamid_combined.rds"))

tib_hmm_replicates <- readRDS(file.path(input_dir, "tib_hmm_replicates.rds"))
tib_hmm_combined <- readRDS(file.path(input_dir, "tib_hmm_combined.rds"))

padamid_metadata_replicates <- readRDS(file.path(input_dir, 
                                                 "padamid_metadata_replicates.rds"))
padamid_metadata_combined <- readRDS(file.path(input_dir, 
                                               "padamid_metadata_combined.rds"))
library(knitr)
opts_chunk$set(fig.width = 10, fig.height = 4, cache = T,
               dev=c('png', 'pdf'), fig.path = file.path(output_dir, "figures/")) 
pdf.options(useDingbats = FALSE)
# From Fede:
# ggpairs custom functions
corColor <- function(data, mapping, color = I("black"), sizeRange = c(1, 3), ...) {

  x   <- eval_data_col(data, mapping$x)
  y   <- eval_data_col(data, mapping$y)
  r   <- cor(x, y)
  rt  <- format(r, digits = 3)
  tt  <- as.character(rt)
  cex <- max(sizeRange)

  # helper function to calculate a useable size
  percent_of_range <- function(percent, range) {
    percent * diff(range) + min(range, na.rm = TRUE)
  }

  # plot correlation coefficient
  p <- ggally_text(label = tt, mapping = aes(), xP = 0.5, yP = 0.5,
                   # size = I(percent_of_range(cex * abs(r), sizeRange)), 
                   size = 6, 
                   color = color, ...) +
    theme(panel.grid.minor=element_blank(),
          panel.grid.major=element_blank())

  corColors <- RColorBrewer::brewer.pal(n = 7, name = "RdYlBu")[2:6]

  if (r <= boundaries[1]) {
    corCol <- corColors[1]
  } else if (r <= boundaries[2]) {
    corCol <- corColors[2]
  } else if (r < boundaries[3]) {
    corCol <- corColors[3]
  } else if (r < boundaries[4]) {
    corCol <- corColors[4]
  } else {
    corCol <- corColors[5]
  }

  p <- p +
    theme(panel.background = element_rect(fill = corCol))

  return(p)
}

customScatter <- function (data, mapping) 
{
    p <- ggplot(data = data, mapping) + 
      geom_bin2d(bins = 100) +
      geom_smooth(method = "lm", se = T, col = "red") +
      scale_fill_gradient(low = "lightgrey", high = "black", name = "Count") +
      theme_bw()
    
    p 
}

CorrelationPerChromosome <- function(tib, ki67, h3k27me3, h3k9me3, lmnb1,
                                     plot_lmnb1 = F) {
  # How does DMSO correlates with histone modifications?
  tib <- tib %>%
    dplyr::select(seqnames, matches(ki67), 
                  matches(h3k27me3), matches(h3k9me3),
                  matches(lmnb1)) %>%
    dplyr::rename_at(2:5, ~ c("ki67", "h3k27me3", "h3k9me3", "lmnb1")) %>%
    drop_na() %>%
    filter(seqnames != "chrY") %>%
    group_by(seqnames) %>%
    dplyr::summarise(h3k27me3 = cor(ki67, h3k27me3, method = "pearson"),
                     h3k9me3 = cor(ki67, h3k9me3, method = "pearson"),
                     lmnb1 = cor(ki67, lmnb1, method = "pearson")) %>%
    ungroup() %>%
    mutate(size = seqlengths(gr_padamid_replicates)[match(seqnames,
                                                          seqlevels(gr_padamid_replicates))]) %>%
    arrange(-size) %>%
    mutate(seqnames = factor(seqnames, levels = unique(seqnames))) %>%
    gather(key, value, contains("h3k"), lmnb1) %>%
    mutate(key = factor(key, c("h3k27me3", "h3k9me3", "lmnb1")))
  
  # Remove lmnb1
  if (plot_lmnb1 == F) {
    tib <- tib %>%
      filter(key != "lmnb1")
  }
    
  # Plot
  plt <- tib %>%
    ggplot(aes(x = seqnames, y = value, col = key)) +
    geom_point() +
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    xlab("") +
    ylab("Pearson") +
    ggtitle(ki67) +
    scale_color_manual(values = RColorBrewer::brewer.pal(4, "Set1")[c(3, 4, 2)]) +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  invisible(tib)
  
}

PlotScatter <- function(tib, n1, n2, color_by = NULL, identity = F,
                        n = 1e99, alpha = 0.02) {
  # Get tibble
  tib <- tib %>%
    dplyr::select(seqnames, one_of(n1), one_of(n2)) %>%
    rename_all(~ c("seqnames", "n1", "n2"))
  
  # Prepare color
  if (! is.null(color_by)) {
    tib <- tib %>%
      add_column(color = color_by) %>%
      drop_na()
    alpha = 1
    limits_color <- quantile(tib$color, c(0.001, 0.999), na.rm = T)
    tib$color[tib$color < limits_color[1]] <- limits_color[1]
    tib$color[tib$color > limits_color[2]] <- limits_color[2]
  } else {
    tib <- tib %>% drop_na()
    tib$color = "1"
    #alpha = alpha
  }
  
  tib <- tib %>%
    arrange(sample(1:nrow(.), size = nrow(.), replace = F))
  
  # Plot
  xlimits <- quantile(tib$n1, c(0.001, 0.999), na.rm = T) * 1.4
  ylimits <- quantile(tib$n2, c(0.001, 0.999), na.rm = T) * 1.4
  
  n <- min(nrow(tib), n)
  
  plt <- tib[1:n, ] %>%
    ggplot(aes(x = n1, y = n2, color = color)) +
    geom_point(size = 0.5, alpha = alpha) +
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
    xlab(n1) +
    ylab(n2) +
    ggtitle(paste0("Spearman: ", 
                   round(cor(tib$n1, tib$n2, use = "complete.obs",
                             method = "spearman"), 2))) +
    coord_cartesian(xlim = xlimits, ylim = ylimits) +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  # Prepare color
  if (! is.null(color_by)) {
    plt <- plt +
      scale_color_gradient2(low = "blue", mid = "grey", high = "red",
                            midpoint = 0)
  } else {
    plt <- plt + 
      scale_color_manual(values = "black", guide = F)
  }
  if (identity) plt <- plt + geom_abline(slope = 1, intercept = 0, col = "red", linetype = "dashed")
  
  plot(plt)
  
}


PlotScatterBinned <- function(tib, n1, n2, color_by = NULL, identity = F, 
                              n_min = 10, ylimits_col = c(-2.4, 2.4)) {
  # Get tibble
  tib_process <- tib %>%
    dplyr::select(seqnames, all_of(n1), all_of(n2)) %>%
    rename_all(~ c("seqnames", "n1", "n2"))
  
  if (! is.null(color_by)) {
    tib_process <- tib_process %>%
      add_column(color = color_by)
  }
  
  tib_process <- tib_process %>%
    drop_na()
  
  # Change color range
  if (! is.null(color_by)) {
    limits_color <- quantile(tib_process$color, c(0.001, 0.999), na.rm = T)
    tib_process$color[tib_process$color < limits_color[1]] <- limits_color[1]
    tib_process$color[tib_process$color > limits_color[2]] <- limits_color[2]
  }
  
  # Metrics
  n1_min = min(tib_process$n1) - 0.001
  n1_max = max(tib_process$n1) + 0.001
  n1_binsize <- (n1_max - n1_min) / 49
  
  n2_min = min(tib_process$n2) - 0.001
  n2_max = max(tib_process$n2) + 0.001
  n2_binsize <- (n2_max - n2_min) / 49
  
  tib_summary <- tib_process %>%
    mutate(n1_cut = cut(n1, 
                        seq(n1_min, n1_max, length.out = 50)),
           n2_cut = cut(n2, 
                        seq(n2_min, n2_max, length.out = 50))) %>%
    mutate(n1_bin = as.numeric(as.factor(n1_cut)),
           n2_bin = as.numeric(as.factor(n2_cut))) %>%
    mutate(n1_bin = n1_min - n1_binsize/2 + n1_bin * n1_binsize,
           n2_bin = n2_min - n2_binsize/2 + n2_bin * n2_binsize) %>%
    group_by(n1_bin, n2_bin)
  
  # Plot
  if (! is.null(color_by)) {
    tib_summary <- tib_summary %>%
    dplyr::summarise(n = n(),
                     mark = mean(color)) %>%
    ungroup() %>%
    filter(n >= n_min)
    
    plt <- tib_summary %>%
      ggplot(aes(x = n1_bin, y = n2_bin)) +
      geom_tile(aes(fill = mark)) +
      scale_fill_gradient2(low = "blue", mid = "grey", high = "red",
                           midpoint = 0, limits = ylimits_col, 
                           na.value = "green")
  } else {
    tib_summary <- tib_summary %>%
    dplyr::summarise(n = n()) %>%
    ungroup() %>%
    filter(n >= n_min)
    
    plt <- tib_summary %>%
      ggplot(aes(x = n1_bin, y = n2_bin)) +
      geom_tile(aes(fill = n)) +
      scale_fill_gradient(low = "lightgrey", high = "black", name = "Count",
                          limits = c(0, 400), na.value = "green")
  }
  
  plt <- plt + 
    xlab(n1) +
    ylab(n2) +
    ggtitle(paste0("Pearson: ", 
                   round(cor(tib_process$n1, tib_process$n2, use = "complete.obs",
                             method = "pearson"), 2))) +
    theme_bw() +
    theme(aspect.ratio = 1)
  
  
  plot(plt)
  
}

quantiles <- function(x) {
  # Use quantiles as boxplot boundaries
  r <- quantile(x, probs = c(0.05, 0.25, 0.5, 0.75, 0.95))
  names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
  r
}

1. Enrichment at and near rDNA sequences

First, I want to confirm the enrichment at and near rDNA sequences. I will start by loading the enrichment at rDNA sequences from the pipeline.

# Prepare metadata
padamid_metadata_wildtype <- padamid_metadata_replicates %>%
  filter(experiment == "wildtype" &
           target == "Ki67")

# Load rDNA counts
rdna_counts <- tibble()

for (s in padamid_metadata_wildtype$sample) {
  tmp <- read_tsv(str_interp("results/reports/experiment/${s}/${s}_rDNAEnrichment.txt"))
  rdna_counts <- bind_rows(rdna_counts, tmp)
}

# Calculate enrichment over Dam
rdna_enrich <- rdna_counts %>% 
  dplyr::select(names, fraction) %>%
  mutate(target = str_remove(names, ".*_"),
         exp = str_remove(names, "_[^_]+$")) %>%
  dplyr::select(-names) %>%
  spread(target, fraction) %>%
  mutate(ratio = log2(Ki67 / Dam),
         cell = str_remove(exp, "_.*"),
         cell = factor(cell, levels(padamid_metadata_wildtype$cell)))

# Plot enrichment
rdna_enrich %>%
  ggplot(aes(x = cell, y = ratio, col = cell)) +
  geom_point(size = 3) +
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  xlab("") +
  ylab("rDNA enrichment (log2)") +
  scale_color_manual(values = colors_set2) +
  theme_bw() +
  theme(aspect.ratio = 1)

I want to repeat this for centromeres, as this is almost the same.

# Load centr counts
centr_counts <- tibble()

for (s in padamid_metadata_wildtype$sample) {
  tmp <- read_tsv(str_interp("results/reports/experiment/${s}/${s}_CentromereEnrichment.txt"))
  centr_counts <- bind_rows(centr_counts, tmp)
}

# Calculate enrichment over Dam
centr_enrich <- centr_counts %>% 
  dplyr::select(names, centromere_extended.fraction) %>%
  mutate(target = str_remove(names, ".*_"),
         exp = str_remove(names, "_[^_]+$")) %>%
  dplyr::select(-names) %>%
  spread(target, centromere_extended.fraction) %>%
  mutate(ratio = log2(Ki67 / Dam),
         cell = str_remove(exp, "_.*"),
         cell = factor(cell, levels(padamid_metadata_wildtype$cell)))

# Plot enrichment
centr_enrich %>%
  ggplot(aes(x = cell, y = ratio, col = cell)) +
  geom_point(size = 3) +
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  xlab("") +
  ylab("centromere enrichment (log2)") +
  scale_color_manual(values = colors_set2) +
  theme_bw() +
  theme(aspect.ratio = 1)

These analyses are based on simple counts. I want to do a similar analysis based on the normalized scores themselves.

# Do this for the combined data
padamid_metadata_combined_wildtype <- padamid_metadata_combined %>%
  # Problem: no H3K9me3 data in HCT116 cells - use DMSO condition instead
  mutate(experiment = case_when(sample == "HCT116_ActD_DMSO_H3K9me3" ~ "wildtype",
                                T ~ as.character(experiment)),
         experiment = factor(experiment,
                             levels(padamid_metadata_combined$experiment))) %>%
  filter(experiment == "wildtype")

# Get similar chromosomes for the two objects and filter samples
chromosomes <- c(paste0("chr", 1:22), "chrX")

tib_padamid_wildtype <- tib_padamid_combined %>%
  dplyr::select(seqnames, start, end,
                one_of(padamid_metadata_combined_wildtype$sample)) %>%
  filter(seqnames %in% chromosomes)
centromeres <- centromeres[seqnames(centromeres) %in% chromosomes]

# Add distance to centromeres to the GRanges object - in Mb
dis <- distanceToNearest(as(tib_padamid_wildtype, "GRanges"), centromeres)
tib_padamid_wildtype$distance_to_centromere <- mcols(dis)$distance / 1e6

# # Process
# tib <- tib_padamid_wildtype %>%
#   dplyr::select(-start, -end) %>%
#   mutate(rdna = as.logical(seqnames %in% paste0("chr", c(13, 14, 15, 21, 22)))) %>%
#   gather(key, value, -distance_to_centromere, -rdna, -seqnames) %>%
#   separate(key, c("cell", "condition", "target"), remove = F) %>%
#   mutate(cell = factor(cell, levels = levels(padamid_metadata_combined_wildtype$cell)),
#          dis_group = as.numeric(cut(distance_to_centromere, 
#                                     breaks = seq(0, max(distance_to_centromere) + 1, 
#                                                  by = 1))) - 1)
# 
# 
# # Plot as boxplots
# # First, calculate boxplots to have more control over ylimits
# tib_summary <- tib %>%
#   group_by(cell, dis_group, rdna) %>%
#   drop_na() %>%
#   summarise(ymin = quantile(value, 0.05), 
#             lower = quantile(value, 0.25), 
#             middle = quantile(value, 0.5), 
#             upper = quantile(value, 0.75), 
#             ymax = quantile(value, 0.95))
# 
# tib_summary %>%
#   filter(dis_group <= 30) %>%
#   ggplot(aes(x = dis_group, ymin = ymin, lower = lower, middle = middle, 
#              upper = upper, ymax = ymax, group = dis_group, y = middle)) +
#     geom_boxplot(stat="identity", outlier.shape = NA, fill = "lightgrey", col = "black") +
#     geom_line(aes(y = middle, group = NULL), col = "red") +
#     geom_hline(yintercept = 0, col = "blue", linetype = "dashed") +
#     facet_grid(cell ~ rdna, scales = "free_y") + 
#     xlab("Distance to centromere (Mb)") +
#     ylab("DamID (log2)") +
#     #coord_cartesian(xlim = c(0, 30)) +
#     theme_bw() +
#     theme(aspect.ratio = 1)
# 
# # Is the enrichment specific for rDNA chromosomes, or is chromosome size more 
# # important?
# tib_summary <- tib %>%
#   filter(dis_group < 2) %>%
#   group_by(cell, seqnames) %>%
#   drop_na() %>%
#   summarise(mean = mean(value)) %>%
#   ungroup() %>%
#   mutate(seqnames = factor(seqnames, chromosomes),
#          rdna = seqnames %in% paste0("chr", c(13, 14, 15, 21, 22))) %>%
#   mutate(size = seqlengths(gr_padamid_replicates)[match(seqnames, 
#                                                         seqlevels(gr_padamid_replicates))],
#          size = size / 1e6)
# 
# tib_summary %>%
#   arrange(-size) %>% 
#   mutate(seqnames = factor(seqnames, levels = unique(seqnames))) %>%
#   ggplot(aes(x = seqnames, y = mean, fill = cell, col = rdna)) +
#   geom_bar(stat = "identity") +
#   geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
#   facet_grid(. ~ cell) +
#   xlab("") +
#   ylab("Ki67 score near centromeres") +
#   scale_color_manual(values = c(NA, "black")) +
#   scale_fill_manual(values = colors_set2, guide = F) +
#   theme_bw() +
#   theme(aspect.ratio = 1,
#         axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
# 
# # Add chromosome size and make a scatter plot
# tib_summary %>%
#   ggplot(aes(x = size, y = mean, col = rdna)) +
#   geom_point(size = 2) +
#   geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
#   xlab("Chromosome size (Mb)") +
#   ylab("Ki67 score near centromeres (<2Mb)") +
#   scale_color_grey() +
#   facet_grid(. ~ cell) +
#   coord_cartesian(xlim = c(0, 250)) +
#   theme_bw() +
#   theme(aspect.ratio = 1)

Combined, it seems that Ki67 does not bind at rDNA sequences, but is enriched near centromeres. Scores at rDNA are only significantly higher than other small chromosomes in RPE cells.

2. Cell type differences

Next, I want to compare cell types. I will make this comparison based on HMM calls and individual bins.

I should consider making this comparison for the combined values or individual replicates. For now, let’s start with the combined values.

2.1 HMM calls

Not good - do not include. HMM calls are too variable, not robust enough.

Honestly, I think the HMM calls are very poor for the Ki67 tracks. Especially the K562 calls are terrible.

2.2 Normalized values

This should be better than the HMM calls. Simply calculating the correlations between cell types.

# Use GGally to make correlation plots
boundaries <- seq(from = 0.1, to = 0.7, length.out = 4)

# Replicates
tib <- tib_padamid_replicates %>%
  dplyr::select(padamid_metadata_wildtype$sample) %>%
  drop_na()

plt <- ggpairs(tib,
               upper = list(continuous = corColor),
               lower = list(continuous = customScatter),
               diag = list(continuous = function(data, mapping, ...) {
                   ggally_densityDiag(data = data, mapping = mapping, alpha = 0.3, fill = "red") +
                   theme_bw()})) +
  ggtitle("Correlating all data") +
  xlab("") +
  ylab("")

print(plt)

# Combined - between cell lines
tib <- tib_padamid_combined %>%
  dplyr::select(padamid_metadata_combined_wildtype$sample) %>%
  dplyr::select(ends_with("Ki67")) %>%
  drop_na()

plt <- ggpairs(tib,
               upper = list(continuous = corColor),
               lower = list(continuous = customScatter),
               diag = list(continuous = function(data, mapping, ...) {
                   ggally_densityDiag(data = data, mapping = mapping, alpha = 0.3, fill = "red") +
                   theme_bw()})) +
  ggtitle("Correlating all data") +
  xlab("") +
  ylab("")

print(plt)

# Combined - between antibodies
tib <- tib_padamid_combined %>%
  dplyr::select(padamid_metadata_combined_wildtype$sample) %>%
  dplyr::select(contains("Ki67") & contains("HCT116")) %>%
  drop_na()

plt <- ggpairs(tib,
               upper = list(continuous = corColor),
               lower = list(continuous = customScatter),
               diag = list(continuous = function(data, mapping, ...) {
                   ggally_densityDiag(data = data, mapping = mapping, alpha = 0.3, fill = "red") +
                   theme_bw()})) +
  ggtitle("Correlating all data") +
  xlab("") +
  ylab("")

print(plt)

I also want to make these plots for individual chromosomes. Scatterplots are too much, but simple pearson correlations should do the trick.

# Calculate pearson correlations per chromosome
CorrelationsPerChromosome <- function(input_tib, metadata) {
  
  # Gather data and calculate pearson correlations
  tib <- input_tib %>%
    dplyr::select(seqnames, all_of(metadata$sample)) %>%
    drop_na()
  
  # Calculate mean score per chromosome
  tib_summary <- tib %>%
    gather(key, value, -seqnames) %>%
    mutate(cell = str_remove(key, "_.*"),
           cell = factor(cell, levels(metadata$cell)),
           seqnames = factor(seqnames, chromosomes),
           rdna = seqnames %in% paste0("chr", c(13, 14, 15, 21, 22))) %>%
    mutate(size = seqlengths(gr_padamid_replicates)[match(seqnames, 
                                                          seqlevels(gr_padamid_replicates))],
           size = size / 1e6) %>%
    drop_na() 
  
  plt <- tib_summary %>%
    arrange(-size) %>% 
    mutate(seqnames = factor(seqnames, levels = unique(seqnames))) %>%
    ggplot(aes(x = seqnames, y = value, fill = cell, col = rdna)) +
    #geom_boxplot(outlier.shape = NA) +
    stat_summary(fun.data = quantiles, geom = "boxplot") +
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    facet_grid(cell ~ .) +
    xlab("") +
    ylab("Ki67 score") +
    coord_cartesian(ylim = c(-1.3, 1.3)) +
    scale_fill_manual(values = colors_set2, guide = F) +
    scale_color_manual(values = c("grey60", "grey0")) +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  plt <- tib_summary %>%
    group_by(cell, size, seqnames) %>%
    summarise(mean = mean(value),
              # q75 = quantile(value, 0.75),
              # q90 = quantile(value, 0.9),
              q95 = quantile(value, 0.95)) %>%
    ungroup() %>%
    gather(key, value, mean, starts_with("q")) %>%
    ggplot(aes(x = size, y = value, col = cell)) +
    geom_point() +
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    facet_grid(key ~ cell) +
    xlab("Chromosome size (Mb)") +
    ylab("Mean Ki67 score") +
    coord_cartesian(xlim = c(0, 250)) +
    scale_color_manual(values = colors_set2, guide = F) +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  tib_cor <- tib %>%
    group_by(seqnames) %>%
    nest() %>%
    mutate(cordf = map(data, correlate)) %>%
    unnest(cordf) %>%
    dplyr::select(-data) %>%
    gather(key, value, -seqnames, -term) %>%
    ungroup() %>%
    drop_na() %>%
    mutate(n1 = str_remove(term, "_Ki67"),
           n2 = str_remove(key, "_Ki67"),
           cell1 = str_remove(term, "_.*"),
           cell2 = str_remove(key, "_.*"),
           cell1 = factor(cell1, levels(metadata$cell)),
           cell2 = factor(cell2, levels(metadata$cell))) %>%
    mutate(size = seqlengths(gr_padamid_replicates)[match(seqnames, 
                                                          seqlevels(gr_padamid_replicates))],
           size = size / 1e6) %>%
    drop_na()
  
  # Plot
  plt <- tib_cor %>%
    arrange(-size) %>% 
    mutate(seqnames = factor(seqnames, levels = unique(seqnames))) %>%
    ggplot(aes(x = seqnames, y = value, col = cell2)) +
    geom_point() +
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    facet_grid(. ~ cell1) +
    xlab("") +
    ylab("Pearson correlation") +
    coord_cartesian(ylim = c(min(0, min(tib_cor$value)), 1)) +
    scale_color_manual(values = colors_set2) +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  plt <- tib_cor %>%
    ggplot(aes(x = size, y = value, col = cell2)) +
    geom_point() +
    geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
    facet_grid(. ~ cell1) +
    xlab("Chromosome size (Mb)") +
    ylab("Pearson correlation") +
    coord_cartesian(xlim = c(0, 250),
                    ylim = c(min(0, min(tib_cor$value)), 1)) +
    scale_color_manual(values = colors_set2) +
    theme_bw() +
    theme(aspect.ratio = 1,
          axis.text.x = element_text(angle = 90, hjust = 1))
  plot(plt)
  
  # To determine statistical significance that small chromosomes have a higher 
  # dynamic range, calculate the SD for each chromosome
  # Use a wilcox test to compare chrom size with SD
  tib_sign_perchrom <- tib_summary %>%
    group_by(seqnames, key, cell, size) %>%
    dplyr::summarise(sd = sd(value)) %>%
    group_by(key, cell) %>%
    dplyr::summarise(pvalue = wilcox.test(size, sd)$p.value)
  print("Wilcox test of SD vs chrom size")
  print(tib_sign_perchrom)
  # Result: all significant
  
  # To determine statistical significance per chromosome comparison, use a 
  # wilcox test of chrom size versus pearson correlation
  tib_sign_betweenchrom <- tib_cor %>%
    group_by(cell1, cell2) %>%
    dplyr::summarise(pvalue = wilcox.test(size, value)$p.value)
  print(tib_sign_betweenchrom)
  
}

# Correlations per chromosome
# CorrelationsPerChromosome(tib_padamid_replicates, 
#                           padamid_metadata_wildtype)
CorrelationsPerChromosome(tib_padamid_combined, 
                          padamid_metadata_combined_wildtype %>%
                            filter(target == "Ki67")) 

## [1] "Wilcox test of SD vs chrom size"
## # A tibble: 3 × 3
## # Groups:   key [3]
##   key            cell     pvalue
##   <chr>          <fct>     <dbl>
## 1 HCT116_wt_Ki67 HCT116 2.43e-13
## 2 K562_wt_Ki67   K562   2.43e-13
## 3 RPE_wt_Ki67    RPE    2.43e-13
## # A tibble: 6 × 3
## # Groups:   cell1 [3]
##   cell1  cell2    pvalue
##   <fct>  <fct>     <dbl>
## 1 RPE    HCT116 2.43e-13
## 2 RPE    K562   2.43e-13
## 3 HCT116 RPE    2.43e-13
## 4 HCT116 K562   2.43e-13
## 5 K562   RPE    2.43e-13
## 6 K562   HCT116 2.43e-13

3. Enrichment near centromeres

Are Ki67 interactions enriched near centromeres, and specifically those of rDNA chromosomes?

# Get similar chromosomes for the two objects and filter samples
chromosomes <- unique(tib_padamid_wildtype$seqnames)
tib <- tib_padamid_wildtype
centromeres <- centromeres[seqnames(centromeres) %in% chromosomes]

# Also, prepare telomeres
telomeres <- c(GRanges(seqnames = chromosomes,
                       ranges = IRanges(start = 1, end = 1)),
               GRanges(seqnames = chromosomes,
                       ranges = IRanges(start = seqlengths(gr_padamid_combined)[chromosomes], 
                                        end = seqlengths(gr_padamid_combined)[chromosomes])))
seqinfo(telomeres) <- seqinfo(gr_padamid_combined)
telomeres <- sort(telomeres)

# Add distance to centromeres and telomeres to the GRanges object - in Mb
dis <- distanceToNearest(as(tib, "GRanges"), centromeres)
tib$distance_to_centromere <- mcols(dis)$distance / 1e6

dis <- distanceToNearest(as(tib, "GRanges"), telomeres)
tib$distance_to_telomere <- mcols(dis)$distance / 1e6

# Process
tib <- tib %>%
  dplyr::select(seqnames, 
                contains("distance"), 
                contains("Ki67"),
                -matches("HPA|NOV")) %>%
  mutate(rdna = as.logical(seqnames %in% paste0("chr", c(13, 14, 15, 21, 22)))) %>%
  gather(key, value, -contains("distance"), -rdna, -seqnames) %>%
  separate(key, c("cell", "experiment", "target"), remove = F) %>%
  mutate(match = match(key, padamid_metadata_combined$sample),
         cell = padamid_metadata_combined$cell[match],
         dis_group_centromere = as.numeric(cut(distance_to_centromere, 
                                               breaks = seq(0, max(distance_to_centromere) + 1, 
                                                            by = 0.5)))/2 - 0.5,
         dis_group_telomere = as.numeric(cut(distance_to_centromere, 
                                               breaks = seq(0, max(distance_to_telomere) + 1, 
                                                            by = 0.5)))/2 - 0.5,
         dis_group_telomere = ifelse(dis_group_telomere < 0, 0, dis_group_telomere)) %>%
  dplyr::select(-contains("distance")) %>%
  gather(class, class_value, contains("dis_group")) %>%
  mutate(class = str_remove(class, "dis_group_")) %>%
  drop_na()



# Is the enrichment specific for rDNA chromosomes, or is chromosome size more 
# important?
tib_summary <- tib %>%
  filter(class_value < 2,
         class == "centromere") %>%
  group_by(cell, seqnames, class) %>%
  drop_na() %>%
  summarise(mean = mean(value)) %>%
  ungroup() %>%
  mutate(seqnames = factor(seqnames, chromosomes),
         rdna = seqnames %in% paste0("chr", c(13, 14, 15, 21, 22))) %>%
  drop_na() %>%
  mutate(size = seqlengths(gr_padamid_replicates)[match(seqnames, 
                                                        seqlevels(gr_padamid_replicates))],
         size = size / 1e6)
## `summarise()` has grouped output by 'cell', 'seqnames'. You can override using the `.groups` argument.
tib_summary %>%
  arrange(-size) %>% 
  mutate(seqnames = factor(seqnames, levels = unique(seqnames))) %>%
  ggplot(aes(x = size, y = mean, col = rdna, shape = rdna)) +
  geom_point(size = 2) +
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(cell ~ .) +
  xlab("") +
  ylab("Ki67 score near centromeres") +
  scale_color_manual(values = c("black", "red")) +
  theme_bw() +
  theme(aspect.ratio = 1,
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

tib_summary %>%
  arrange(-size) %>% 
  mutate(seqnames = factor(seqnames, levels = unique(seqnames))) %>%
  ggplot(aes(x = seqnames, y = mean, col = rdna, shape = rdna)) +
  geom_point(size = 2) +
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  facet_grid(cell ~ .) +
  xlab("") +
  ylab("Ki67 score near centromeres") +
  scale_color_manual(values = c("black", "red")) +
  theme_bw() +
  theme(aspect.ratio = 1,
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

# Plot as boxplots
tib_summary <- tib %>%
  filter(class == "centromere") %>%
  group_by(cell, class_value) %>%
  drop_na() %>%
  summarise(ymin = quantile(value, 0.05),
            lower = quantile(value, 0.25),
            middle = quantile(value, 0.5),
            upper = quantile(value, 0.75),
            ymax = quantile(value, 0.95))
## `summarise()` has grouped output by 'cell'. You can override using the `.groups` argument.
tib_summary %>%
  filter(class_value <= 20) %>%
  ggplot(aes(x = class_value, ymin = ymin, lower = lower, middle = middle,
             upper = upper, ymax = ymax, group = class_value, y = middle)) +
    geom_boxplot(stat="identity", outlier.shape = NA, fill = "lightgrey", col = "black") +
    geom_line(aes(y = middle, group = NULL), col = "red") +
    geom_hline(yintercept = 0, col = "blue", linetype = "dashed") +
    facet_grid(cell ~ .) +
    xlab("Distance to centromere (Mb)") +
    ylab("Ki67") +
    #coord_cartesian(xlim = c(0, 30)) +
    theme_bw() +
    theme(aspect.ratio = 1)

4. Comparisons with histone modifications and NL interactions

Next, compare the Ki67 interactions with histone modifications (H3K27me3 and H3K9me3) and NL interactions.

Note: I have not generated H3K9me3 signal in wildtype HCT116 cells. I will use the DMSO condition instead.

set.seed(999)

tib_cor_rpe <- CorrelationPerChromosome(tib_padamid_wildtype,
                         ki67 = "RPE_wt_Ki67",
                         h3k27me3 = "RPE_wt_H3K27me3",
                         h3k9me3 = "RPE_wt_H3K9me3",
                         lmnb1 = "RPE_wt_LMNB1")

tib_cor_hct116 <- CorrelationPerChromosome(tib_padamid_wildtype,
                         ki67 = "HCT116_wt_Ki67",
                         h3k27me3 = "HCT116_wt_H3K27me3",
                         h3k9me3 = "HCT116_ActD_DMSO_H3K9me3",
                         lmnb1 = "HCT116_wt_LMNB1")

tib_cor_k562 <- CorrelationPerChromosome(tib_padamid_wildtype,
                         ki67 = "K562_wt_Ki67",
                         h3k27me3 = "K562_wt_H3K27me3",
                         h3k9me3 = "K562_wt_H3K9me3",
                         lmnb1 = "K562_wt_LMNB1")

# Make combined plot
bind_rows(tib_cor_rpe %>% 
            add_column(cell = "RPE"),
          tib_cor_hct116 %>% 
            add_column(cell = "HCT116"),
          tib_cor_k562 %>% 
            add_column(cell = "K562")) %>%
  mutate(cell = factor(cell, levels(padamid_metadata_wildtype$cell))) %>%
  ggplot(aes(x = seqnames, y = value, col = key)) +
  geom_point() +
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  xlab("") +
  ylab("Pearson") +
  facet_grid(. ~ cell) +
  scale_color_manual(values = RColorBrewer::brewer.pal(4, 
                                                       "Set1")[c(3, 4, 2)]) +
  theme_bw() +
  theme(aspect.ratio = 1,
        axis.text.x = element_text(angle = 90, hjust = 1))

# Also, some scatter plots
PlotScatter(tib_padamid_wildtype, "RPE_wt_Ki67", "RPE_wt_LMNB1")
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

PlotScatter(tib_padamid_wildtype, "RPE_wt_Ki67", "RPE_wt_LMNB1", 
            color_by = tib_padamid_wildtype$RPE_wt_H3K27me3)

PlotScatter(tib_padamid_wildtype, "RPE_wt_Ki67", "RPE_wt_LMNB1", 
            color_by = tib_padamid_wildtype$RPE_wt_H3K9me3)

PlotScatter(tib_padamid_wildtype, "HCT116_wt_Ki67", "HCT116_wt_LMNB1")
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

PlotScatter(tib_padamid_wildtype, "HCT116_wt_Ki67", "HCT116_wt_LMNB1", 
            color_by = tib_padamid_wildtype$HCT116_wt_H3K27me3)

PlotScatter(tib_padamid_wildtype, "HCT116_wt_Ki67", "HCT116_wt_LMNB1", 
            color_by = tib_padamid_wildtype$HCT116_ActD_DMSO_H3K9me3)

PlotScatter(tib_padamid_wildtype, "K562_wt_Ki67", "K562_wt_LMNB1")
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

PlotScatter(tib_padamid_wildtype, "K562_wt_Ki67", "K562_wt_LMNB1", 
            color_by = tib_padamid_wildtype$K562_wt_H3K27me3)

PlotScatter(tib_padamid_wildtype, "K562_wt_Ki67", "K562_wt_LMNB1", 
            color_by = tib_padamid_wildtype$K562_wt_H3K9me3)

# Now the "binned" scatter plots
PlotScatterBinned(tib_padamid_wildtype, "RPE_wt_Ki67", "RPE_wt_LMNB1")
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_padamid_wildtype, "RPE_wt_Ki67", "RPE_wt_LMNB1", 
                  color_by = tib_padamid_wildtype$RPE_wt_H3K27me3,
                  ylimits_col = c(-2.4, 2.2))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_padamid_wildtype, "RPE_wt_Ki67", "RPE_wt_LMNB1", 
                  color_by = tib_padamid_wildtype$RPE_wt_H3K9me3,
                  ylimits_col = c(-1.8, 1.8))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_padamid_wildtype, "HCT116_wt_Ki67", "HCT116_wt_LMNB1")
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_padamid_wildtype, "HCT116_wt_Ki67", "HCT116_wt_LMNB1", 
                  color_by = tib_padamid_wildtype$HCT116_wt_H3K27me3,
                  ylimits_col = c(-2.4, 2.2))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_padamid_wildtype, "HCT116_wt_Ki67", "HCT116_wt_LMNB1", 
                  color_by = tib_padamid_wildtype$HCT116_ActD_DMSO_H3K9me3,
                  ylimits_col = c(-1.8, 1.8))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_padamid_wildtype, "K562_wt_Ki67", "K562_wt_LMNB1")
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_padamid_wildtype, "K562_wt_Ki67", "K562_wt_LMNB1", 
                  color_by = tib_padamid_wildtype$K562_wt_H3K27me3,
                  ylimits_col = c(-2.4, 2.2))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

PlotScatterBinned(tib_padamid_wildtype, "K562_wt_Ki67", "K562_wt_LMNB1", 
                  color_by = tib_padamid_wildtype$K562_wt_H3K9me3,
                  ylimits_col = c(-1.8, 1.8))
## `summarise()` has grouped output by 'n1_bin'. You can override using the `.groups` argument.

# # Same plots, filtered for LADs
# RPE_lad_idx <- which(tib_hmm_combined$RPE_wt_LMNB1 == "AD")
# PlotScatter(tib_padamid_wildtype[RPE_lad_idx, ], 
#             "RPE_wt_Ki67", "RPE_wt_LMNB1")
# PlotScatter(tib_padamid_wildtype[RPE_lad_idx, ], 
#             "RPE_wt_Ki67", "RPE_wt_LMNB1", 
#             color_by = tib_padamid_wildtype$RPE_wt_H3K27me3[RPE_lad_idx])
# PlotScatter(tib_padamid_wildtype[RPE_lad_idx, ], 
#             "RPE_wt_Ki67", "RPE_wt_LMNB1", 
#             color_by = tib_padamid_wildtype$RPE_wt_H3K9me3[RPE_lad_idx])
# 
# HCT116_lad_idx <- which(tib_hmm_combined$HCT116_wt_LMNB1 == "AD")
# PlotScatter(tib_padamid_wildtype[HCT116_lad_idx, ], 
#             "HCT116_wt_Ki67", "HCT116_wt_LMNB1")
# PlotScatter(tib_padamid_wildtype[HCT116_lad_idx, ], 
#             "HCT116_wt_Ki67", "HCT116_wt_LMNB1", 
#             color_by = tib_padamid_wildtype$HCT116_ActD_DMSO_H3K9me3[HCT116_lad_idx])
# PlotScatter(tib_padamid_wildtype[HCT116_lad_idx, ], 
#             "HCT116_wt_Ki67", "HCT116_wt_LMNB1", 
#             color_by = tib_padamid_wildtype$HCT116_wt_H3K9me3[HCT116_lad_idx])
# 
# K562_lad_idx <- which(tib_hmm_combined$K562_wt_LMNB1 == "AD")
# PlotScatter(tib_padamid_wildtype[K562_lad_idx, ], 
#             "K562_wt_Ki67", "K562_wt_LMNB1")
# PlotScatter(tib_padamid_wildtype[K562_lad_idx, ], 
#             "K562_wt_Ki67", "K562_wt_LMNB1", 
#             color_by = tib_padamid_wildtype$K562_wt_H3K27me3[K562_lad_idx])
# PlotScatter(tib_padamid_wildtype[K562_lad_idx, ], 
#             "K562_wt_Ki67", "K562_wt_LMNB1", 
#             color_by = tib_padamid_wildtype$K562_wt_H3K9me3[K562_lad_idx])

Next, show that the balance between lamina and nucleolus is anti-correlated between the chromosomes.

# Calculate "max" score per chromosome
tib <- tib_padamid_wildtype %>%
  dplyr::select(seqnames, contains("Ki67"), contains("LMNB1"),
                -matches("NOV|HPA")) %>%
  gather(key, value, -seqnames) %>%
  mutate(idx = match(key, padamid_metadata_combined$sample),
         sample = padamid_metadata_combined$sample[idx],
         cell = padamid_metadata_combined$cell[idx],
         target = padamid_metadata_combined$target[idx]) %>%
  group_by(cell, target, seqnames) %>%
  dplyr::summarise(q90 = quantile(value, 0.95, na.rm = T)) %>%
  spread(target, q90)
## `summarise()` has grouped output by 'cell', 'target'. You can override using the `.groups` argument.
# Plot simple scatterplot
tib %>%
  ggplot(aes(x = Ki67, y = LMNB1, col = cell, shape = cell)) +
  geom_point() +
  geom_text(aes(y = LMNB1 + 0.1, label = seqnames),
            col = "black", size = 3) +
  geom_smooth(method = "lm") +
  facet_grid(cell ~ .) +
  scale_color_brewer(palette = "Set2") +
  theme_bw() +
  theme(aspect.ratio = 1)
## `geom_smooth()` using formula 'y ~ x'

tib %>%
  ggplot(aes(x = Ki67, y = LMNB1, col = cell, shape = cell)) +
  geom_point() +
  geom_smooth(method = "lm") +
  scale_color_brewer(palette = "Set2") +
  theme_bw() +
  theme(aspect.ratio = 1)
## `geom_smooth()` using formula 'y ~ x'

Finally, as requested by reviewer #3, test for significance to support to claim that “both proteins interact with similar domains” (Ki-67 and Lamin B1). For this, use a cross-correlation between the interactions maps.

# Calculate cross correlations
ccf_rpe <- ccf(tib_padamid_wildtype$RPE_wt_Ki67, 
               tib_padamid_wildtype$RPE_wt_LMNB1, 
               na.action = na.pass, lag.max = 200, plot = F)
ccf_hct <- ccf(tib_padamid_wildtype$HCT116_wt_Ki67, 
               tib_padamid_wildtype$HCT116_wt_LMNB1, 
               na.action = na.pass, lag.max = 200, plot = F)
ccf_k562 <- ccf(tib_padamid_wildtype$K562_wt_Ki67, 
                tib_padamid_wildtype$K562_wt_LMNB1, 
                na.action = na.pass, lag.max = 200, plot = F)

# Combine and plot
tib_ccf <- bind_rows(tibble(cell = "RPE",
                            ccf = c(ccf_rpe$acf),
                            lag = c(ccf_rpe$lag)),
                     tibble(cell = "HCT116",
                            ccf = c(ccf_hct$acf),
                            lag = c(ccf_hct$lag)),
                     tibble(cell = "K562",
                            ccf = c(ccf_k562$acf),
                            lag = c(ccf_k562$lag))) %>%
  mutate(cell = factor(cell,
                       levels = levels(padamid_metadata_combined$cell)))

tib_ccf %>%
  ggplot(aes(x = lag * 50 / 1000, y = ccf, col = cell)) +
  geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
  geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
  geom_line() +
  xlab("Distance (Mb)") +
  ylab("CCF") +
  scale_color_manual(values = colors_set2) +
  theme_bw() +
  theme(aspect.ratio = 1)

# Also, calculate significance between the Ki-67 and Lamin B1 tracks
print(cor.test(tib_padamid_wildtype$RPE_wt_Ki67,
               tib_padamid_wildtype$RPE_wt_LMNB1))
## 
##  Pearson's product-moment correlation
## 
## data:  tib_padamid_wildtype$RPE_wt_Ki67 and tib_padamid_wildtype$RPE_wt_LMNB1
## t = 47.512, df = 56619, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1878748 0.2037169
## sample estimates:
##       cor 
## 0.1958086
print(cor.test(tib_padamid_wildtype$HCT116_wt_Ki67,
               tib_padamid_wildtype$HCT116_wt_LMNB1))
## 
##  Pearson's product-moment correlation
## 
## data:  tib_padamid_wildtype$HCT116_wt_Ki67 and tib_padamid_wildtype$HCT116_wt_LMNB1
## t = 190.54, df = 56709, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6197210 0.6297568
## sample estimates:
##       cor 
## 0.6247647
print(cor.test(tib_padamid_wildtype$K562_wt_Ki67,
               tib_padamid_wildtype$K562_wt_LMNB1))
## 
##  Pearson's product-moment correlation
## 
## data:  tib_padamid_wildtype$K562_wt_Ki67 and tib_padamid_wildtype$K562_wt_LMNB1
## t = 148.89, df = 56571, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5246454 0.5364865
## sample estimates:
##       cor 
## 0.5305918

Conclusions

Sevaral conclusions:

  • Ki67 are partially conserved between cell types.
  • Interactions are enriched on small chromosomes.
  • Interactions are enriched near telomeres.
  • There is a local anti-correlation with LaminB1 interactions.
  • Histone modifications and Ki67 / LaminB1 have a cell type dependent correlation.

Session info

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] knitr_1.33           corrr_0.4.3          GGally_2.1.2        
##  [4] RColorBrewer_1.1-2   rtracklayer_1.50.0   GenomicRanges_1.42.0
##  [7] GenomeInfoDb_1.26.7  IRanges_2.24.1       S4Vectors_0.28.1    
## [10] BiocGenerics_0.36.1  forcats_0.5.1        stringr_1.4.0       
## [13] dplyr_1.0.7          purrr_0.3.4          readr_2.0.0         
## [16] tidyr_1.1.3          tibble_3.1.6         ggplot2_3.3.5       
## [19] tidyverse_1.3.1     
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-152                bitops_1.0-7               
##  [3] matrixStats_0.60.0          fs_1.5.0                   
##  [5] bit64_4.0.5                 lubridate_1.7.10           
##  [7] httr_1.4.2                  tools_4.0.5                
##  [9] backports_1.2.1             bslib_0.2.5.1              
## [11] utf8_1.2.2                  R6_2.5.1                   
## [13] mgcv_1.8-36                 DBI_1.1.1                  
## [15] colorspace_2.0-2            withr_2.4.2                
## [17] tidyselect_1.1.1            bit_4.0.4                  
## [19] compiler_4.0.5              cli_3.1.0                  
## [21] rvest_1.0.1                 Biobase_2.50.0             
## [23] xml2_1.3.2                  DelayedArray_0.16.3        
## [25] labeling_0.4.2              sass_0.4.0                 
## [27] scales_1.1.1                digest_0.6.28              
## [29] Rsamtools_2.6.0             rmarkdown_2.11             
## [31] XVector_0.30.0              pkgconfig_2.0.3            
## [33] htmltools_0.5.1.1           MatrixGenerics_1.2.1       
## [35] highr_0.9                   dbplyr_2.1.1               
## [37] rlang_0.4.12                readxl_1.3.1               
## [39] rstudioapi_0.13             farver_2.1.0               
## [41] jquerylib_0.1.4             generics_0.1.0             
## [43] jsonlite_1.7.2              vroom_1.5.3                
## [45] BiocParallel_1.24.1         RCurl_1.98-1.3             
## [47] magrittr_2.0.1              GenomeInfoDbData_1.2.4     
## [49] Matrix_1.3-4                Rcpp_1.0.7                 
## [51] munsell_0.5.0               fansi_0.5.0                
## [53] lifecycle_1.0.1             stringi_1.7.3              
## [55] yaml_2.2.1                  SummarizedExperiment_1.20.0
## [57] zlibbioc_1.36.0             plyr_1.8.6                 
## [59] grid_4.0.5                  crayon_1.4.2               
## [61] lattice_0.20-44             splines_4.0.5              
## [63] Biostrings_2.58.0           haven_2.4.1                
## [65] hms_1.1.0                   pillar_1.6.4               
## [67] codetools_0.2-18            reprex_2.0.0               
## [69] XML_3.99-0.6                glue_1.5.0                 
## [71] evaluate_0.14               modelr_0.1.8               
## [73] vctrs_0.3.8                 tzdb_0.1.2                 
## [75] cellranger_1.1.0            gtable_0.3.0               
## [77] reshape_0.8.8               assertthat_0.2.1           
## [79] xfun_0.24                   broom_0.7.9                
## [81] GenomicAlignments_1.26.0    ellipsis_0.3.2